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DISCLAIMER 


THIS DOCUMENTATION IS PROVIDED FOR USE WITH LEMOTE PRODUCTS. NO 
LICENSE TO LEMOTE PROPERTY RIGHTS IS GRANTED. LEMOTE ASSUMES NO 
LIABILITY, PROVIDES NO WARRANTY EITHER EXPRESSED OR IMPLIED RELATING 
TO THE USAGE, OR INTELLECTUAL PROPERTY RIGHT INFRINGEMENT EXCEPT AS 
PROVIDED FOR BY LEMOTE TERMS AND CONDITIONS OF SALE. 

LEMOTE PRODUCTS ARE NOT DESIGNED FOR AND SHOULD NOT BE USED IN ANY 
MEDICAL OR LIFE SUSTAINING OR SUPPORTING EQUIPMENT. 

ALL INFORMATION IN THIS DOCUMENT SHOULD BE TREATED AS PRELIMINARY. 
LEMOTE MAY MAKE CHANGES TO THIS DOCUMENT WITHOUT NOTICE. ANYONE 
RELYING ON THIS DOCUMENTATION SHOULD CONTACT LEMOTE FOR THE 
CURRENT DOCUMENTATION AND ERRATA. 

JIANGSU LEMOTE TECHNOLOGY CORPORATION LIMITED 

MENGLAN INDUSTRIAL PARK,YUSHAN,CHANGSHU CITY,JIANGSU PROVINCE,CHINA 

Tel: 0512-52308661 
Fax: 0512-52308688 
Http: //www.lemote.com 
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DISCLAIMER.2 

—. SIMDf^^.4 
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—. SlMDi^^ 

SIMD (Single Instruction Multiple Data, 

« SIMD 

1. mmxJl^o mmx 

(8,16,32 {4) 
mmx A 

2. (paired-single,f^#: ps)o#tjE^ opcode yX.ps 

float 

#)o 

3. t:k^Pi£^#i£y:RL±il'o]M6^tft5iI.T, short |j[ 

Id dadd ^a^Wt 

^M7 4 

TJJJaL, SIMD mmx 

H. SIMD3E^#^:^+^ 

mips64 
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ff siMD 


ra. siMD)r^?ii^^:^:/3 5J 

Clii^ WitiJ 4^ WMf t : 

2 . 

pmon start.S 4P mplayer M 

libvo/godson_memcpy.S c 

makefile X\^, 

objdump -mmips:loongson_2f -d Jf^nPXrS^^R^'Cliip 

fXlJ, -|^»XiPT: 

_asm_( 
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".set noreorder\n" 

".set arch=loongson2f\n" 

oooooooo 

".set noreorder\n" 

ilijHj rP 
ifllARP 



); 

A^MT.set noreorderJiJItHioPluStl.set noreorder 

memory 

A 032 P.mim 16 64 

($fO,$f2,$f4...),4P:^^i5ffl$f1 32«i^fflo 

im±, 32 A64m?AWA4lo 


LEN float MM a, b A 

a Mo PjbtfP. c M4PrtMnilMH4PtfH]M, M 
100 A. LEN P)ibfiiko 

test-c.c : 
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simd 


#define LEN 1000000 
void main() 

{ 

float a[LEN] attribute ((aligned(8))) ;//i^^ 8 

float b[LEN] attribute ((aligned(8))) ; 

int j,i; 

for(j = 0 ; j < LEN ; j++){ lim^M 
a\i] = 1.2345+ j; 
b\i] = 6.5432+ j; 

} 

for(i=0;i<100;i++) 

for(j = 0 ;j < LEN ;]++){ 

a[j] = aO] + by]; 

} 

} 

test-asm.c : 

#define LEN 1000000 
void main() 

{ 

float a[LEN] attribute ((aligned(8))) ; 

float b[LEN] attribute ((aligned(8))) ; 

int], i; 

for(j = 0 ; j < LEN ; ]++){ // 
ay] = 1.2345 + ]; 
by] = 6.5432 + ]; 

} 

for(i=0;i < 100 ;i++) 

_asm_( 

".set noreorder\n" 

".set arch=loongson2f\n" 

"1 :\n" 

"siti $8,%2,2\n" // 2 

"bnez $8,2f\n" 

"addiu %2,-2\n" 

"Idol $f0,0(%0)\n"//^;t^Sem#M, ^■32j4, 

"Idol $f2,0(%1)\n"//ldc1 float, 64 {46^tit 

"add.ps $f0,$f0,$f2\n"// add.ps float 

"sdcl $f0,0(%0)\n" 

"addiu %0,8\n" 
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"b 1 b\n" 

"addiu %1,8\n" // 

"2:\n" 2,^ o sk i 

"beq %2,$0,3f\n" 

"nop\n" nop±SE 

"Iwc1 $f0,0(%0)\n" 

"Iwc1 $f2,0(%1)\n" 

"add $f0,$f0,$f2\n" 

"swc1 $f0,0(%0)\n" 

"3:\n" 

".set reorder\n" 

::"r"(a),"r"(b),"r"(LEN) 

:"$8","$f0","$f2","memory" 


} 

Htli (delay slot), «see mips 

run)) 


F mm 6003 


RAYS-b0f748fa:/tmp# gcc test-c.c -o test-c 

test-c.c: In function ‘main’: 

test-c.c:3: warning: return type of ‘main’ is not ‘inf 

RAYS-b0f748fa:/tmp# gcc test-asm.c -o test-asm 

test-asm.c: In function ‘main’: 

test-asm.c:3: warning: return type of ‘main’ is not ‘inf 

RAYS-b0f748fa:/tmp# time ./test-c 
real 0m7.929s 

user 0m7.890s 

sys 0m0.035s 

RAYS-b0f748fa:/tmp# time ./test-asm 
real 0m3.763s 

user 0m3.703s 

sys 0m0.055s 
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£r®4>»t*»i4S#reaia*s simb «♦«»!#« 


^5a-£«jK'(-t*«g)T3iTffl SIMD . SB-t-aftin* 

BPI6ifi^r4A 

gcc test-c.c -o test-c -02 
gcc test-asm.c -o test-asm -02 

RAYS-b0f748fa:/tmp# time ./test-c 
real 0m0.004s 
user OmO.OOOs 
sys 0m0.004s 

RAYS-b0f748fa:/tmp# time ./test-asm 
real 0m0.093s 
user 0m0.070s 
sys 0m0.023s 




n; 






test-c temp 

RAYS-b0f748fa:/tmp# objdump -mmips:loongson_2f -d test-c > temp 

temp 4*^1^ main, ^31 main 




400658: 2484082c addiu 

40065c: 03e00008 jr ra 

400660: 00000000 nop 


00400670 <main>: 

400670: 03e00008 jr ra 

400674: 00000000 nop 


00400680 <_libc_csu_fini>: 

400680: 03e00008 jr ra 

400684: 00000000 nop 


a0,a0,2092 
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simd 


itmplayer^’WW^i* 

AA http://dev.lemote.com T^o 
7s. mi^xm 

P:^7 gcc, binutils, iuW: 

%mmmammw±-%zp[r, k ^ aam ± 

apt-get binutils gp^o 

fi^nfbiK^iJ.set arch=loongson2f P^^MIAAt7o 2F 

, btiu madd.ps iA^rJa^ 
('tlii737^^IAA;lf 7 ^WP^o lomote deb 

M 7 f# deb http://dev.lemote.com/debian-loongson loongson 
contrib main non-free,apt-get instaii binutiis SP^ 

-b. 

7^7/7 gdb 

xmnmm xxmm^mpi 

/±EP^AA"M|nl^^: 

1. 32 64 im sii KIM 32 itl, dsii IM 64 

2. ^ M17 il AM IK ^ o ^ AAII ^ M l7 K: ^ M If , bttu 
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char->short,float->double 

3. #-^|'n]Mo btiU IW 

Iwu 

4. imrunMo im int 4, short 

2 , load store 0t-aiao 

5. E0ttii'n]Mo ^ftn 

6. im 6s\\ 

40 dsll32 

Jf ig 0t i-»J31M o 

|itA^n#"addiu %0,8\n"^j^ 
T"addu %0,8\n", r 

3.error: invalid 'asm': operand number out of range 

^P:^#"ldc1 $f2,0(%1)\n"^j^T"ldc1 
$f2,2(%1 )\n",3^# Idol 8 AT. /fc^L±4^E;^^ 
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ff siMD 


tiP:^fi3^#:#"addiu %2,-2\n" 

I'n] 7 7:74 I'n] S'l] ift iih o 

tiP:^#"addiu %2,-2\n" &^%2 Pj(jt%3,||i7P7Mlti^ 3 , 

3 ^:SW%3 (AA^0A#i()o 

W ieee Ai¥DTj^lE^i6^l7AJi[o #^7 JhA 

cpu At^iK 

btm:xyixm, fsis^rPtm^^iAo 

float a[2]_attribute_((aligned(8))),b[2],c[2]; 

a[0]=a[1]=12.2; 

b[0]=b[1]=12.2; 

c[0]=c[1]=12.2; 

_asm_( 

".set arch=loongson2f\n" 

".set noreorder\n" 

"Idc1 $f0,0(%0)\n" 

"Idc1 $f2,0(%1)\n" 

"Idc1 $f4,0(%2)\n" 

"madd.ps $f4,$f0,$f2\n" 

".set reorder\n" 

::"r"(a),"r"(b),"r"(c) 

:"$f0","$f2","memory" 

): 






^#A-Ai|^P«l:6^iika, store ^AWA^i'JIPAi&a, 
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ff siMD 


A. 

top cpu i^^bt, 

oprofile P^ISi5;gi^, cpu iSAA 

oprofile Ptl opannotate (oprofile 

iJ'tliiiIi31SAl'o]M)o opcontrol opreport MA^A^n^/fc 
tbyiA^A^Ao oprofile oprofile P^'g'AA 

if)}o oprofile A A dev.lemote.com apt MAT® o 


punpckibh/punpckihw/punpckiwd 

punpckhbh/punpckhhw/punpckhwd 

packsshb/packsswh 

packushb 

pcmpgtb/pcmpgth/pcmpgtw 

pcmpeqb/pcmpeqh/pcmpeqw 

pmaxsh 

pmaxub 

pminsh 
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pminub 

paddb/paddh/paddw/paddd 

paddsb/paddsh 

paddusb/paddush 

psubb/psubh/psubw/psubd 

psubsb/psubsh 

psubusb/psubush 

pmullh 

pmulhh 

pmuluw 

pmuihuh 

pmaddhw 

pandn 

psrih/psriw 

psrah/psraw 

psilh/psllw 

pavgb/pavgh 

Pshufh 

pmovmskb 

pextrh 

pinsrh_0/pinsrh_1 /pinsrh_2/pinsrh_3 
pasubub 
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ff siMD 


biadd 

or 

xor 

nor 

and 

srI 

dsri 

sra 

dsra 

sll 

dsll 

add 

addu 

dadd 

sub 

subu 

dsub 

seq 

seq1 

sit 

situ 

sle 
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sleu 

mms 

ADD.ps 

MADD.ps 

MSUB.ps 

NMADD.ps 

NMSUB.ps 

SUB.ps 

NEG.ps 

ABS.ps 

C.F.ps 

C.UN.ps 

C.EQ.ps 

C.UEQ.ps 

C.0LT.ps 

C.ULT.ps 

C.0LE.ps 

C.ULE.ps 

C.SF.ps 

C.NGLE.ps 

C.SEQ.ps 
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simd 


C.NGL.ps 

C.LT.ps 

C.NGE.ps 

C.LE.ps 

C.NGT.ps 

MUL.ps 

MOV.ps 
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